/*
 * TextHandler.java
 *
 * Created on 2 September 2006, 9:20
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

package fasea.index.filehandler;

import java.io.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

/**
 *
 * @author gvasil
 */
class TXTHandler extends FileHandler {

    
  public Document getTypedDocument(File file) throws FileHandlerException {
      InputStream is = null;
      try {
          is = new FileInputStream(file);
      }
      catch (FileNotFoundException e) {
          throw new FileHandlerException(
                  "File not found: "
                  + file.getAbsolutePath(), e);
      }

      org.apache.lucene.document.Document doc = createDefaultLuceneDocument(file);
      
      String text = null;
      try {
          text = getText(is);
      }
      catch(IOException e) {
          throw new FileHandlerException("Cannot read the text document", e);
      }
      
      if (text != null && text.trim().length() > 0) {
          doc.add(new Field("content", text, Field.Store.NO,
                  Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
      }
      return doc;
  }
  
  private String getText(InputStream is) throws IOException {
      StringBuilder str = new StringBuilder();
      
      BufferedReader br = new BufferedReader(new InputStreamReader(is));
      String line = null;
      while ((line = br.readLine()) != null) {
          line = line.replaceAll("\\s+", " ");
          str.append(line + "\n");
      }
      br.close();
      
      return str.toString().trim();
  }
}